{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise 3: Parallel ETL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext sql"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from time import time\n",
    "import configparser\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# STEP 1: Get the params of the created redshift cluster \n",
    "- We need:\n",
    "    - The redshift cluster <font color='red'>endpoint</font>\n",
    "    - The <font color='red'>IAM role ARN</font> that give access to Redshift to read from S3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "config = configparser.ConfigParser()\n",
    "config.read_file(open('dwh.cfg'))\n",
    "KEY=config.get('AWS','key')\n",
    "SECRET= config.get('AWS','secret')\n",
    "\n",
    "DWH_DB= config.get(\"DWH\",\"DWH_DB\")\n",
    "DWH_DB_USER= config.get(\"DWH\",\"DWH_DB_USER\")\n",
    "DWH_DB_PASSWORD= config.get(\"DWH\",\"DWH_DB_PASSWORD\")\n",
    "DWH_PORT = config.get(\"DWH\",\"DWH_PORT\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# FILL IN THE REDSHIFT ENPOINT HERE\n",
    "# e.g. DWH_ENDPOINT=\"redshift-cluster-1.csmamz5zxmle.us-west-2.redshift.amazonaws.com\" \n",
    "DWH_ENDPOINT=\"\" \n",
    "    \n",
    "#FILL IN THE IAM ROLE ARN you got in step 2.2 of the previous exercise\n",
    "#e.g DWH_ROLE_ARN=\"arn:aws:iam::988332130976:role/dwhRole\"\n",
    "DWH_ROLE_ARN=\"\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# STEP 2: Connect to the Redshift Cluster"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "conn_string=\"postgresql://{}:{}@{}:{}/{}\".format(DWH_DB_USER, DWH_DB_PASSWORD, DWH_ENDPOINT, DWH_PORT,DWH_DB)\n",
    "print(conn_string)\n",
    "%sql $conn_string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import boto3\n",
    "\n",
    "s3 = boto3.resource('s3',\n",
    "                       region_name=\"us-west-2\",\n",
    "                       aws_access_key_id=KEY,\n",
    "                       aws_secret_access_key=SECRET\n",
    "                     )\n",
    "\n",
    "sampleDbBucket =  s3.Bucket(\"udacity-labs\")\n",
    "\n",
    "for obj in sampleDbBucket.objects.filter(Prefix=\"tickets\"):\n",
    "    print(obj)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# STEP 3: Create Tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%sql \n",
    "DROP TABLE IF EXISTS \"sporting_event_ticket\";\n",
    "CREATE TABLE \"sporting_event_ticket\" (\n",
    "    \"id\" double precision DEFAULT nextval('sporting_event_ticket_seq') NOT NULL,\n",
    "    \"sporting_event_id\" double precision NOT NULL,\n",
    "    \"sport_location_id\" double precision NOT NULL,\n",
    "    \"seat_level\" numeric(1,0) NOT NULL,\n",
    "    \"seat_section\" character varying(15) NOT NULL,\n",
    "    \"seat_row\" character varying(10) NOT NULL,\n",
    "    \"seat\" character varying(10) NOT NULL,\n",
    "    \"ticketholder_id\" double precision,\n",
    "    \"ticket_price\" numeric(8,2) NOT NULL\n",
    ");"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# STEP 4: Load Partitioned data into the cluster"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "qry = \"\"\"\n",
    "    copy sporting_event_ticket from 's3://udacity-labs/tickets/split/part'\n",
    "    credentials 'aws_iam_role={}'\n",
    "    gzip delimiter ';' compupdate off region 'us-west-2';\n",
    "\"\"\".format(DWH_ROLE_ARN)\n",
    "\n",
    "%sql $qry"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# STEP 4: Create Tables for the non-partitioned data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%sql\n",
    "DROP TABLE IF EXISTS \"sporting_event_ticket_full\";\n",
    "CREATE TABLE \"sporting_event_ticket_full\" (\n",
    "    \"id\" double precision DEFAULT nextval('sporting_event_ticket_seq') NOT NULL,\n",
    "    \"sporting_event_id\" double precision NOT NULL,\n",
    "    \"sport_location_id\" double precision NOT NULL,\n",
    "    \"seat_level\" numeric(1,0) NOT NULL,\n",
    "    \"seat_section\" character varying(15) NOT NULL,\n",
    "    \"seat_row\" character varying(10) NOT NULL,\n",
    "    \"seat\" character varying(10) NOT NULL,\n",
    "    \"ticketholder_id\" double precision,\n",
    "    \"ticket_price\" numeric(8,2) NOT NULL\n",
    ");"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# STEP 5: Load non-partitioned data into the cluster\n",
    "- Note how it's slower than loading partitioned data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "\n",
    "qry = \"\"\"\n",
    "    copy sporting_event_ticket_full from 's3://udacity-labs/tickets/full/full.csv.gz' \n",
    "    credentials 'aws_iam_role={}' \n",
    "    gzip delimiter ';' compupdate off region 'us-west-2';\n",
    "\"\"\".format(DWH_ROLE_ARN)\n",
    "\n",
    "%sql $qry"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
